For this guiding question, I look only at 2024 data. Below, you’ll see on the each pilot practice (what schools hope to implement in the next 1-5 years) on the left and catalyst for innovation, or the reason(s) school leaders cite for innovating, across the top.
# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>%
select(starts_with("catalyst_")) %>%
select(-starts_with("catalyst_key"), -contains("_other"))
pilots <- full %>%
select(starts_with("pilot_")) %>%
select(-contains("_other")) remove_zero_variance <- function(df) {
# Apply function to each column
non_constant_cols <- df[, apply(df, 2, sd) != 0]
return(non_constant_cols)
}
catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed
reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.3, c1.cex = 0.3, number.cex = 0.2, diag = TRUE)Kind of clunky. I’m going to modify to fix the labels manually and then expand the figure to better see the correlations.
# rename catalysts
rename_catalyst_mapping <- setNames(
dictionary$clean_labels[dictionary$variable_name %in% colnames(catalysts)],
dictionary$variable_name[dictionary$variable_name %in% colnames(catalysts)]
)
# rename pilots
rename_pilot_mapping <- setNames(
dictionary$clean_labels[dictionary$variable_name %in% colnames(pilots)],
dictionary$variable_name[dictionary$variable_name %in% colnames(pilots)]
)
# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>%
select(starts_with("catalyst_")) %>%
select(-starts_with("catalyst_key"), -contains("_other")) %>%
rename_with(~ rename_catalyst_mapping[.x], .cols = everything())
pilots <- full %>%
select(starts_with("pilot_")) %>%
select(-contains("_other")) %>%
rename_with(~ rename_pilot_mapping[.x], .cols = everything())
catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed
reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.6, c1.cex = 0.3, number.cex = 0.2, diag = TRUE, cl.pos = "n")The following are related to each catalyst. Correlations above 0.15 are noted.
catalyst_all_years <- import(here("data/longitudinal", "longitudinal_data.csv")) %>%
select(year, school_id, starts_with("catalyst")) %>%
filter(year == 2021 | year == 2024)
catalyst_all_years_long <- catalyst_all_years %>%
select(-contains("_other"), -contains("_key")) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_")From Cycle 2, I had started to create this graph. This shows catalyst selection across schools.
library(ggrepel)
catalyst_all_years_long <- catalyst_all_years_long %>%
group_by(catalyst, year) %>%
summarize(total_selected = sum(selected))
label_positions <- catalyst_all_years_long %>%
group_by(catalyst) %>%
summarize(year = 2021, selected = first(total_selected))
catalyst_all_years_long %>%
ggplot(aes(x = year, y = total_selected, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
labs(x = "Year",
y = "Number of Times Selected",
title = "Catalyst Selection by Year \n Across Schools") +
theme(legend.position = "none")Version using percentages is added here.
n_2021 = 232
n_2024 = 189
catalyst_all_years_long <- catalyst_all_years_long %>%
mutate(pct = case_when(year == 2021 ~ total_selected/n_2021,
year == 2024 ~ total_selected/n_2024))
label_positions <- catalyst_all_years_long %>%
group_by(catalyst) %>%
summarize(year = 2021, pct = first(pct))
catalyst_all_years_long %>%
ggplot(aes(x = year, y = pct, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
scale_y_continuous(labels = scales::percent_format()) +
labs(x = "Year",
y = "Percent of Times Selected",
title = "Catalyst Selection by Year \n Across Schools") +
theme(legend.position = "none")What about schools who responded to the survey both years? So, looking within schools? Let’s narrow the sample and check that out.
catalyst_all_years_within <- catalyst_all_years %>%
filter(duplicated(school_id) | duplicated(school_id, fromLast = TRUE)) %>%
select(-contains("_other"), -contains("_key")) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_") %>%
group_by(catalyst, year) %>%
summarize(total_selected = sum(selected))
label_positions <- catalyst_all_years_within %>%
group_by(catalyst) %>%
summarize(year = 2021, selected = first(total_selected))
catalyst_all_years_within %>%
ggplot(aes(x = year, y = total_selected, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
labs(x = "Year",
y = "Number of Times Selected",
title = "Catalyst Selection by Year \n Within Schools") +
theme(legend.position = "none")Now, in percentage. The total value here will be different than in
the across graph since not ever school answered every year.
Only 82 did.
n_within = 82
catalyst_all_years_within <- catalyst_all_years_within %>%
mutate(pct = total_selected/n_within)
label_positions <- catalyst_all_years_within %>%
group_by(catalyst) %>%
summarize(year = 2021, pct = first(pct))
catalyst_all_years_within %>%
ggplot(aes(x = year, y = pct, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
scale_y_continuous(labels = scales::percent_format()) +
labs(x = "Year",
y = "Percent of Times Selected",
title = "Catalyst Selection by Year \n Within Schools") +
theme(legend.position = "none")Create a version similar to the most added practices figures.
catalyst_all_years_within_change <- catalyst_all_years_within %>%
pivot_wider(names_from = year,
values_from = c(total_selected, pct))
catalyst_all_years_within_change %>%
ggplot(aes(x = total_selected_2021, xend = total_selected_2024, y = reorder(catalyst, total_selected_2024), yend = catalyst)) +
geom_segment(color = "black", linetype = "dotted") +
geom_point(aes(x = total_selected_2021), color = "red") +
geom_point(aes(x = total_selected_2024), color = "blue") +
guides(col = guide_legend(nrow = 1, title = NULL)) +
bar_x_scale_count +
geom_text(
aes(x = (total_selected_2021 + total_selected_2024)/2, label = paste("Δ =", total_selected_2024 - total_selected_2021), color = factor(sign(total_selected_2024 - total_selected_2021))),
nudge_y = .3,
hjust = 0,
show.legend = FALSE
) +
scale_color_manual(
values = c("red", "blue"),
labels = c("2021", "2024")
) +
labs(
y = "Catalysts",
x = "Times Selected",
title = "Catalyst Selection from 2021 to 2024 Within Schools"
) +
theme(
panel.grid.major.y = element_blank(),
legend.position = "bottom",
axis.text.y = element_text(size = rel(0.6))
)How many schools selected just one catalyst in particular?
one_cat <- catalyst_all_years %>%
select(-contains("_other"), -contains("_key")) %>%
mutate(cat_select = rowSums(across(3:11))) %>%
filter(cat_select == 1) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_") %>%
filter(selected == 1) %>%
group_by(year, catalyst) %>%
summarise(count = n())one_cat %>%
ggplot(aes(x = count, y = catalyst, fill = as.factor(year))) +
geom_col(position = "dodge") +
scale_fill_manual(values = transcend_cols) +
labs(title = "Solo-Select Catalysts",
x = "Catalyst",
y = "Count",
legend = "Year")Also going to represent this information here in a change plot. Note, this is not going to be within schools since there is only once school that selected one catalyst each year. Also note, covid and student_agency were exclusive to 2021.
one_cat %>%
pivot_wider(names_from = "year",
values_from = "count") %>%
ggplot(aes(x = `2021`, xend = `2024`, y = reorder(catalyst, `2024`), yend = catalyst)) +
geom_segment(color = "black", linetype = "dotted") +
geom_point(aes(x = `2021`), color = "red") +
geom_point(aes(x = `2024`), color = "blue") +
geom_point(x = 1, y = "internal", color = "purple") +
geom_point(x = 1, y = "external", color = "purple") +
guides(col = guide_legend(nrow = 1, title = NULL)) +
# bar_x_scale_count +
geom_text(
aes(x = (`2021` + `2024`)/2 -1, label = paste("Δ =", `2024` - `2021`), color = factor(sign(`2024` - `2021`))),
nudge_y = .3,
hjust = 0,
show.legend = FALSE
) +
labs(
y = "Catalysts",
x = "Times Selected",
title = "Solo-Select Catalyst Selection \nfrom 2021 to 2024 Across Schools"
) +
theme(
panel.grid.major.y = element_blank(),
legend.position = "bottom",
axis.text.y = element_text(size = rel(0.6))
)cat_by_year <- catalyst_all_years %>%
select(year, school_id, starts_with("catalyst_key"), -contains("_other")) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst_key",
values_to = "selected",
names_prefix = "catalyst_key_") %>%
pivot_wider(names_from = year,
values_from = selected) %>%
na.omit() %>% #omit schools that only answered one year
mutate(selected = case_when(`2021` == 0 & `2024` == 0 ~ "neither year",
`2021` == 0 & `2024` == 1 ~ "added",
`2021` == 1 & `2024` == 0 ~ "dropped",
`2021` == 1 & `2024` == 1 ~ "both years")) %>%
group_by(catalyst_key, selected) %>%
summarise(n = n())cat_by_year %>%
ggplot(aes(x = n, y = catalyst_key, fill = selected)) +
geom_col(position = "dodge") +
scale_fill_manual(values = transcend_cols) +
labs(title = "Catalyst Key Selection for Schools with Both Years of Data",
x = "Number of Schools Selecting",
y = "Catalyst",
legend = "Status")This is from Cycle 2. What if we looked at schools who only selected one catalyst?
one_cat_change <- catalyst_all_years %>%
select(-contains("_other"), -contains("_key")) %>%
mutate(cat_select = rowSums(across(3:11))) %>%
filter(cat_select == 1) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_") %>%
filter(selected == 1)Looks like there was just one school who meets this criteria. This school went from focusing on inequity to demographics.
I’m continuing to explore this question, this time with a vertical barchart of adds/drops only. Note again, these are schools who responded both years.
cat_by_year %>%
pivot_wider(names_from = "selected",
values_from = "n") %>%
mutate(dropped = -1*dropped) %>%
ggplot(aes(x = reorder(catalyst_key, -dropped))) +
geom_col(aes(y = added), fill = transcend_cols[1]) +
geom_col(aes(y = dropped), fill = transcend_cols[3]) +
geom_hline(yintercept = 0, linetype = 2) +
coord_flip() +
labs(title = "Catalyst Key Adds/Drops for \nSchools Answering Both Years",
y = "Drops (Negative) and Adds (Positive)",
x = "Catalyst")2024 only
First, am curious how the free response category responded. I generated a wordcloud of these for barriers below.
library(wordcloud)
library(tm)
responses <- barriers$barrier_other_text
# Create a text corpus
corpus <- Corpus(VectorSource(responses))
# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower)) # Convert to lower case
corpus <- tm_map(corpus, removePunctuation) # Remove punctuation
corpus <- tm_map(corpus, removeNumbers) # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english")) # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace) # Strip whitespace
# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)
# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)
# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))While we’re here, I’m also including a wordcloud for the catalysts for innovation here.
responses <- full$catalyst_other_text
# Create a text corpus
corpus <- Corpus(VectorSource(responses))
# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower)) # Convert to lower case
corpus <- tm_map(corpus, removePunctuation) # Remove punctuation
corpus <- tm_map(corpus, removeNumbers) # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english")) # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace) # Strip whitespace
# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)
# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)
# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))Back to barriers –
For the rest of the options, here is what leaders selected.
barriers <- barriers %>%
select(-contains("_other")) %>%
pivot_longer(cols = contains("barrier"),
names_to = "barrier",
values_to = "selected",
names_prefix = "barrier_") %>%
filter(selected == 1) %>%
group_by(barrier) %>%
summarize(n = n())barriers %>%
ggplot(aes(reorder(barrier, n), n)) +
geom_col(aes(fill = barrier)) +
scale_fill_manual(values = c(transcend_cols, transcend_cols2)) +
labs(title = "2024 Barriers to Sustainability",
x = "Barrier",
y = "Count") +
theme(legend.position = "none") +
coord_flip()